Introduction

Load libraries

Load data

clus_data <- read.csv("clus_data.csv")
clus_data <- data.frame(clus_data)
str(clus_data)
## 'data.frame':    130 obs. of  43 variables:
##  $ X           : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ SUBTIME     : Factor w/ 125 levels "10.3.17 12:56",..: 114 115 116 117 118 119 120 121 122 123 ...
##  $ JOBFUNC     : int  5 3 4 6 4 6 3 2 6 1 ...
##  $ JOBFUNCOTHER: Factor w/ 14 levels "","Account Manager",..: 1 1 1 14 1 9 1 1 3 1 ...
##  $ JOBTIME     : int  48 60 12 36 9999 60 7 36 12 36 ...
##  $ GENDER      : Factor w/ 2 levels "F","M": 2 2 2 2 2 NA 2 1 2 2 ...
##  $ INVA        : int  2 2 3 3 1 4 5 1 2 3 ...
##  $ INVB        : int  NA 4 4 4 1 NA 5 5 4 5 ...
##  $ INVC        : int  4 2 3 3 1 NA 4 2 3 2 ...
##  $ INVD        : int  NA 2 3 3 1 NA 5 2 2 4 ...
##  $ INVE        : int  NA 4 3 3 5 NA 1 4 4 3 ...
##  $ INVF        : int  4 2 4 4 1 NA 4 3 3 4 ...
##  $ INVG        : int  2 2 1 2 5 NA 5 1 1 1 ...
##  $ INVH        : int  NA 4 2 3 5 NA 1 1 2 3 ...
##  $ INVI        : int  NA 4 2 4 5 NA 2 4 3 2 ...
##  $ INVJ        : int  2 5 2 1 2 NA 1 1 1 2 ...
##  $ INVK        : int  NA NA NA 3 4 NA 2 1 1 2 ...
##  $ INVL        : int  NA NA NA 3 5 NA 4 1 4 4 ...
##  $ INVM        : int  3 2 NA 2 4 NA 3 3 3 2 ...
##  $ INVN        : int  NA 2 NA 2 3 NA 2 2 3 3 ...
##  $ ROLE        : Factor w/ 4 levels "Developer","Manager",..: 1 2 1 1 1 3 2 4 2 1 ...
##  $ worktime    : int  48 60 12 36 NA 60 7 36 12 36 ...
##  $ age_range   : Factor w/ 5 levels "20 or less","21-30",..: 5 4 3 4 3 3 3 4 4 4 ...
##  $ gender      : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 NA 2 1 2 2 ...
##  $ team_size   : Factor w/ 5 levels "11-20","3-5",..: 1 1 1 1 1 3 NA 2 3 2 ...
##  $ USERA       : int  5 3 5 5 3 4 4 5 5 3 ...
##  $ USERB       : int  2 5 2 3 5 5 4 1 1 4 ...
##  $ USERC       : int  5 2 3 4 2 4 4 4 5 2 ...
##  $ USERD       : int  3 2 3 4 2 2 3 5 4 1 ...
##  $ USERE       : int  4 3 4 4 2 2 4 5 4 1 ...
##  $ USERF       : int  4 2 4 5 2 3 3 5 4 1 ...
##  $ INF.MNG     : int  4 3 NA 4 3 3 3 4 5 2 ...
##  $ INF.UX      : int  4 4 NA NA 3 4 3 5 4 2 ...
##  $ INF.DEV     : int  4 2 NA 4 3 3 2 4 4 2 ...
##  $ INF.SLF     : int  4 2 4 4 2 3 3 5 5 5 ...
##  $ condexp     : Factor w/ 4 levels "Never","Occasionally",..: 4 1 2 4 2 4 2 4 4 4 ...
##  $ X3.3A       : int  4 5 4 3 1 5 3 4 4 4 ...
##  $ X3.3B       : int  3 2 4 4 5 2 5 5 2 4 ...
##  $ X3.3C       : int  5 5 4 4 1 5 3 4 4 4 ...
##  $ X3.3D       : int  3 2 4 4 4 4 5 5 2 4 ...
##  $ X3.3E       : int  3 5 4 2 1 5 5 2 4 4 ...
##  $ X3.3F       : int  3 3 4 4 4 3 5 4 1 3 ...
##  $ COMPANY     : Factor w/ 4 levels "Ericsson","F-secure",..: 1 1 1 1 1 1 1 1 1 1 ...
cols <- c(7:20, 26:35, 37:42) # Selecting the numerical columns
clus_data_selected <- clus_data[, cols]
str(clus_data_selected)
## 'data.frame':    130 obs. of  30 variables:
##  $ INVA   : int  2 2 3 3 1 4 5 1 2 3 ...
##  $ INVB   : int  NA 4 4 4 1 NA 5 5 4 5 ...
##  $ INVC   : int  4 2 3 3 1 NA 4 2 3 2 ...
##  $ INVD   : int  NA 2 3 3 1 NA 5 2 2 4 ...
##  $ INVE   : int  NA 4 3 3 5 NA 1 4 4 3 ...
##  $ INVF   : int  4 2 4 4 1 NA 4 3 3 4 ...
##  $ INVG   : int  2 2 1 2 5 NA 5 1 1 1 ...
##  $ INVH   : int  NA 4 2 3 5 NA 1 1 2 3 ...
##  $ INVI   : int  NA 4 2 4 5 NA 2 4 3 2 ...
##  $ INVJ   : int  2 5 2 1 2 NA 1 1 1 2 ...
##  $ INVK   : int  NA NA NA 3 4 NA 2 1 1 2 ...
##  $ INVL   : int  NA NA NA 3 5 NA 4 1 4 4 ...
##  $ INVM   : int  3 2 NA 2 4 NA 3 3 3 2 ...
##  $ INVN   : int  NA 2 NA 2 3 NA 2 2 3 3 ...
##  $ USERA  : int  5 3 5 5 3 4 4 5 5 3 ...
##  $ USERB  : int  2 5 2 3 5 5 4 1 1 4 ...
##  $ USERC  : int  5 2 3 4 2 4 4 4 5 2 ...
##  $ USERD  : int  3 2 3 4 2 2 3 5 4 1 ...
##  $ USERE  : int  4 3 4 4 2 2 4 5 4 1 ...
##  $ USERF  : int  4 2 4 5 2 3 3 5 4 1 ...
##  $ INF.MNG: int  4 3 NA 4 3 3 3 4 5 2 ...
##  $ INF.UX : int  4 4 NA NA 3 4 3 5 4 2 ...
##  $ INF.DEV: int  4 2 NA 4 3 3 2 4 4 2 ...
##  $ INF.SLF: int  4 2 4 4 2 3 3 5 5 5 ...
##  $ X3.3A  : int  4 5 4 3 1 5 3 4 4 4 ...
##  $ X3.3B  : int  3 2 4 4 5 2 5 5 2 4 ...
##  $ X3.3C  : int  5 5 4 4 1 5 3 4 4 4 ...
##  $ X3.3D  : int  3 2 4 4 4 4 5 5 2 4 ...
##  $ X3.3E  : int  3 5 4 2 1 5 5 2 4 4 ...
##  $ X3.3F  : int  3 3 4 4 4 3 5 4 1 3 ...
# and a scaled version of that with NA's removed
clus_data_scaled <- scale(na.omit(clus_data_selected))

Correlation matrix

cor_matrix <- cor(clus_data_selected, use = "pairwise.complete.obs")
print(cor_matrix %>% round(2))
##          INVA  INVB  INVC  INVD  INVE  INVF  INVG  INVH  INVI  INVJ  INVK
## INVA     1.00 -0.09  0.55  0.46 -0.66  0.35  0.33 -0.01 -0.06 -0.04 -0.06
## INVB    -0.09  1.00 -0.18  0.01  0.12  0.10 -0.19 -0.18 -0.13 -0.10 -0.19
## INVC     0.55 -0.18  1.00  0.48 -0.53  0.29  0.31  0.01 -0.05  0.01  0.03
## INVD     0.46  0.01  0.48  1.00 -0.56  0.22  0.34 -0.05 -0.09  0.03 -0.08
## INVE    -0.66  0.12 -0.53 -0.56  1.00 -0.42 -0.39  0.03  0.03  0.04  0.13
## INVF     0.35  0.10  0.29  0.22 -0.42  1.00  0.33 -0.04  0.06 -0.19 -0.10
## INVG     0.33 -0.19  0.31  0.34 -0.39  0.33  1.00  0.11  0.08 -0.02 -0.02
## INVH    -0.01 -0.18  0.01 -0.05  0.03 -0.04  0.11  1.00  0.23  0.23  0.44
## INVI    -0.06 -0.13 -0.05 -0.09  0.03  0.06  0.08  0.23  1.00  0.10  0.14
## INVJ    -0.04 -0.10  0.01  0.03  0.04 -0.19 -0.02  0.23  0.10  1.00  0.19
## INVK    -0.06 -0.19  0.03 -0.08  0.13 -0.10 -0.02  0.44  0.14  0.19  1.00
## INVL    -0.12 -0.01 -0.09 -0.17  0.33 -0.20  0.10  0.14  0.11  0.15  0.34
## INVM    -0.06 -0.18  0.01 -0.09  0.06 -0.15 -0.05  0.22  0.14  0.07  0.43
## INVN    -0.08 -0.10 -0.06 -0.05  0.17 -0.07  0.01  0.21  0.05  0.10  0.12
## USERA   -0.13  0.08 -0.05  0.02  0.05 -0.03 -0.04 -0.02  0.00 -0.14 -0.14
## USERB    0.18 -0.14 -0.07  0.06 -0.02  0.03  0.15 -0.02  0.28  0.14  0.15
## USERC   -0.12  0.09  0.04 -0.07  0.23 -0.07 -0.04 -0.11 -0.09 -0.23 -0.11
## USERD   -0.16  0.13  0.07 -0.08  0.18 -0.30 -0.13 -0.18 -0.02 -0.14 -0.04
## USERE   -0.05  0.17  0.13  0.00 -0.05 -0.03  0.04 -0.14 -0.15 -0.11 -0.05
## USERF   -0.06  0.03  0.11 -0.10  0.12 -0.07 -0.06 -0.10  0.01 -0.19 -0.04
## INF.MNG -0.16 -0.06  0.11  0.05  0.11  0.13  0.02 -0.12 -0.08 -0.31 -0.19
## INF.UX  -0.14  0.09  0.08  0.03 -0.02  0.11  0.01 -0.07 -0.02 -0.37 -0.14
## INF.DEV -0.12  0.05  0.08  0.09  0.03  0.13  0.08 -0.07 -0.06 -0.29 -0.21
## INF.SLF -0.13  0.21  0.10  0.00  0.11 -0.06 -0.10 -0.14 -0.24 -0.32 -0.14
## X3.3A   -0.07  0.08 -0.12 -0.11  0.16 -0.03 -0.24 -0.11 -0.08  0.07 -0.08
## X3.3B    0.03 -0.03 -0.03 -0.10  0.03 -0.07  0.15 -0.03  0.12 -0.17 -0.06
## X3.3C    0.15  0.15  0.15  0.14 -0.12  0.06 -0.13  0.03 -0.03 -0.07 -0.08
## X3.3D    0.18  0.05  0.01 -0.16 -0.16  0.14  0.13 -0.17  0.16 -0.01 -0.09
## X3.3E    0.09  0.15  0.18  0.05 -0.01  0.11 -0.13  0.00 -0.06 -0.03 -0.07
## X3.3F    0.18 -0.05  0.06  0.10 -0.21  0.00  0.18 -0.01 -0.06  0.05 -0.11
##          INVL  INVM  INVN USERA USERB USERC USERD USERE USERF INF.MNG
## INVA    -0.12 -0.06 -0.08 -0.13  0.18 -0.12 -0.16 -0.05 -0.06   -0.16
## INVB    -0.01 -0.18 -0.10  0.08 -0.14  0.09  0.13  0.17  0.03   -0.06
## INVC    -0.09  0.01 -0.06 -0.05 -0.07  0.04  0.07  0.13  0.11    0.11
## INVD    -0.17 -0.09 -0.05  0.02  0.06 -0.07 -0.08  0.00 -0.10    0.05
## INVE     0.33  0.06  0.17  0.05 -0.02  0.23  0.18 -0.05  0.12    0.11
## INVF    -0.20 -0.15 -0.07 -0.03  0.03 -0.07 -0.30 -0.03 -0.07    0.13
## INVG     0.10 -0.05  0.01 -0.04  0.15 -0.04 -0.13  0.04 -0.06    0.02
## INVH     0.14  0.22  0.21 -0.02 -0.02 -0.11 -0.18 -0.14 -0.10   -0.12
## INVI     0.11  0.14  0.05  0.00  0.28 -0.09 -0.02 -0.15  0.01   -0.08
## INVJ     0.15  0.07  0.10 -0.14  0.14 -0.23 -0.14 -0.11 -0.19   -0.31
## INVK     0.34  0.43  0.12 -0.14  0.15 -0.11 -0.04 -0.05 -0.04   -0.19
## INVL     1.00  0.25  0.17 -0.07  0.05  0.16  0.07  0.06  0.06   -0.16
## INVM     0.25  1.00  0.06 -0.01 -0.08  0.02  0.04  0.00  0.10   -0.07
## INVN     0.17  0.06  1.00 -0.04 -0.04  0.02  0.06 -0.02 -0.05   -0.05
## USERA   -0.07 -0.01 -0.04  1.00 -0.28  0.46  0.34  0.34  0.36    0.24
## USERB    0.05 -0.08 -0.04 -0.28  1.00 -0.51 -0.33 -0.35 -0.34   -0.14
## USERC    0.16  0.02  0.02  0.46 -0.51  1.00  0.50  0.44  0.54    0.30
## USERD    0.07  0.04  0.06  0.34 -0.33  0.50  1.00  0.59  0.71    0.19
## USERE    0.06  0.00 -0.02  0.34 -0.35  0.44  0.59  1.00  0.68    0.37
## USERF    0.06  0.10 -0.05  0.36 -0.34  0.54  0.71  0.68  1.00    0.35
## INF.MNG -0.16 -0.07 -0.05  0.24 -0.14  0.30  0.19  0.37  0.35    1.00
## INF.UX  -0.16  0.06 -0.20  0.33 -0.36  0.26  0.35  0.27  0.39    0.34
## INF.DEV -0.09  0.03  0.03  0.39 -0.51  0.46  0.30  0.39  0.43    0.53
## INF.SLF  0.04 -0.01 -0.07  0.49 -0.53  0.62  0.44  0.46  0.49    0.37
## X3.3A    0.03  0.06  0.07  0.10  0.13  0.07 -0.08 -0.01 -0.01    0.15
## X3.3B    0.08 -0.12 -0.08  0.00  0.05 -0.03  0.11  0.00  0.05   -0.18
## X3.3C   -0.09 -0.10 -0.04  0.14  0.01  0.12  0.15  0.11  0.12    0.19
## X3.3D   -0.03  0.01  0.02 -0.06  0.01 -0.04  0.08  0.16  0.14    0.02
## X3.3E    0.08 -0.15 -0.07  0.03 -0.08  0.07  0.02  0.01  0.05    0.16
## X3.3F   -0.17 -0.17 -0.18  0.12 -0.08 -0.04  0.10  0.13  0.10   -0.01
##         INF.UX INF.DEV INF.SLF X3.3A X3.3B X3.3C X3.3D X3.3E X3.3F
## INVA     -0.14   -0.12   -0.13 -0.07  0.03  0.15  0.18  0.09  0.18
## INVB      0.09    0.05    0.21  0.08 -0.03  0.15  0.05  0.15 -0.05
## INVC      0.08    0.08    0.10 -0.12 -0.03  0.15  0.01  0.18  0.06
## INVD      0.03    0.09    0.00 -0.11 -0.10  0.14 -0.16  0.05  0.10
## INVE     -0.02    0.03    0.11  0.16  0.03 -0.12 -0.16 -0.01 -0.21
## INVF      0.11    0.13   -0.06 -0.03 -0.07  0.06  0.14  0.11  0.00
## INVG      0.01    0.08   -0.10 -0.24  0.15 -0.13  0.13 -0.13  0.18
## INVH     -0.07   -0.07   -0.14 -0.11 -0.03  0.03 -0.17  0.00 -0.01
## INVI     -0.02   -0.06   -0.24 -0.08  0.12 -0.03  0.16 -0.06 -0.06
## INVJ     -0.37   -0.29   -0.32  0.07 -0.17 -0.07 -0.01 -0.03  0.05
## INVK     -0.14   -0.21   -0.14 -0.08 -0.06 -0.08 -0.09 -0.07 -0.11
## INVL     -0.16   -0.09    0.04  0.03  0.08 -0.09 -0.03  0.08 -0.17
## INVM      0.06    0.03   -0.01  0.06 -0.12 -0.10  0.01 -0.15 -0.17
## INVN     -0.20    0.03   -0.07  0.07 -0.08 -0.04  0.02 -0.07 -0.18
## USERA     0.33    0.39    0.49  0.10  0.00  0.14 -0.06  0.03  0.12
## USERB    -0.36   -0.51   -0.53  0.13  0.05  0.01  0.01 -0.08 -0.08
## USERC     0.26    0.46    0.62  0.07 -0.03  0.12 -0.04  0.07 -0.04
## USERD     0.35    0.30    0.44 -0.08  0.11  0.15  0.08  0.02  0.10
## USERE     0.27    0.39    0.46 -0.01  0.00  0.11  0.16  0.01  0.13
## USERF     0.39    0.43    0.49 -0.01  0.05  0.12  0.14  0.05  0.10
## INF.MNG   0.34    0.53    0.37  0.15 -0.18  0.19  0.02  0.16 -0.01
## INF.UX    1.00    0.56    0.45 -0.01 -0.01  0.16 -0.05  0.18  0.01
## INF.DEV   0.56    1.00    0.68  0.03 -0.03  0.20  0.03  0.02  0.02
## INF.SLF   0.45    0.68    1.00  0.02  0.03  0.22  0.02  0.12 -0.05
## X3.3A    -0.01    0.03    0.02  1.00 -0.50  0.46 -0.08  0.20 -0.13
## X3.3B    -0.01   -0.03    0.03 -0.50  1.00 -0.27  0.35  0.01  0.22
## X3.3C     0.16    0.20    0.22  0.46 -0.27  1.00  0.07  0.25  0.06
## X3.3D    -0.05    0.03    0.02 -0.08  0.35  0.07  1.00 -0.06  0.32
## X3.3E     0.18    0.02    0.12  0.20  0.01  0.25 -0.06  1.00  0.02
## X3.3F     0.01    0.02   -0.05 -0.13  0.22  0.06  0.32  0.02  1.00
# add order = "hclust" as a parameter below for clustering of correlation coefficients
corrplot.mixed(cor_matrix, lower = "number", upper = "circle", order = "hclust")

# simpler view with clusters squared:
corrplot(cor_matrix, order = "hclust", addrect = 5)

get the most significant correlations (p > 0.05):

correlations <- rcorr(as.matrix(clus_data_scaled))
for (i in 1:30){
  for (j in 1:30){
    if ( !is.na(correlations$P[i,j])){
      if ( correlations$P[i,j] < 0.05 ) {
        print(paste(rownames(correlations$P)[i], "-" , colnames(correlations$P)[j], ": ", correlations$P[i,j]))
      }}}}
## [1] "INVA - INVC :  5.97600688858613e-05"
## [1] "INVA - INVD :  8.89989073225017e-05"
## [1] "INVA - INVE :  3.84155813826226e-08"
## [1] "INVA - INVF :  0.00198188375699071"
## [1] "INVA - INVG :  0.00181498402122715"
## [1] "INVA - USERA :  0.0353299558107456"
## [1] "INVA - USERB :  0.0036006742456467"
## [1] "INVA - USERC :  0.0312598398747386"
## [1] "INVA - USERD :  0.0199488647352783"
## [1] "INVA - INF.SLF :  0.0412554118716966"
## [1] "INVB - INVH :  0.0219413904112731"
## [1] "INVB - INVK :  0.0258895287357712"
## [1] "INVB - INVM :  0.027868034378528"
## [1] "INVB - USERE :  0.0480961857970503"
## [1] "INVB - INF.SLF :  0.00873403508279336"
## [1] "INVB - X3.3E :  0.014731131345026"
## [1] "INVC - INVA :  5.97600688858613e-05"
## [1] "INVC - INVD :  6.46935275350202e-07"
## [1] "INVC - INVE :  0.000311494372586818"
## [1] "INVC - INVF :  0.0141054671985286"
## [1] "INVC - INVG :  0.0082405581477345"
## [1] "INVC - X3.3E :  0.0193337474844781"
## [1] "INVC - X3.3F :  0.0418212014638715"
## [1] "INVD - INVA :  8.89989073225017e-05"
## [1] "INVD - INVC :  6.46935275350202e-07"
## [1] "INVD - INVE :  3.49190905080121e-08"
## [1] "INVD - INVG :  0.000897161776227229"
## [1] "INVE - INVA :  3.84155813826226e-08"
## [1] "INVE - INVC :  0.000311494372586818"
## [1] "INVE - INVD :  3.49190905080121e-08"
## [1] "INVE - INVF :  0.00108847486518449"
## [1] "INVE - INVG :  0.00458732361555048"
## [1] "INVE - INVL :  0.00135806748739675"
## [1] "INVE - USERC :  0.00602840850829445"
## [1] "INVE - USERD :  0.0213030996404555"
## [1] "INVE - INF.SLF :  0.024334039746599"
## [1] "INVF - INVA :  0.00198188375699071"
## [1] "INVF - INVC :  0.0141054671985286"
## [1] "INVF - INVE :  0.00108847486518449"
## [1] "INVF - INVJ :  0.00708013807871888"
## [1] "INVF - X3.3E :  0.02474032352352"
## [1] "INVG - INVA :  0.00181498402122715"
## [1] "INVG - INVC :  0.0082405581477345"
## [1] "INVG - INVD :  0.000897161776227229"
## [1] "INVG - INVE :  0.00458732361555048"
## [1] "INVG - USERB :  0.0468498083014208"
## [1] "INVG - X3.3A :  0.0162703862893996"
## [1] "INVH - INVB :  0.0219413904112731"
## [1] "INVH - INVI :  0.00582602853296277"
## [1] "INVH - INVK :  8.93195067575014e-06"
## [1] "INVH - USERD :  0.0337626978285754"
## [1] "INVH - USERE :  0.00422267591765935"
## [1] "INVH - USERF :  0.031166497787726"
## [1] "INVH - X3.3D :  0.00816333431166094"
## [1] "INVI - INVH :  0.00582602853296277"
## [1] "INVI - USERB :  0.00251543695502021"
## [1] "INVI - INF.SLF :  0.0187977431290931"
## [1] "INVJ - INVF :  0.00708013807871888"
## [1] "INVJ - INF.MNG :  0.018037167998306"
## [1] "INVJ - INF.UX :  0.000932404587634883"
## [1] "INVJ - INF.DEV :  0.0282445092149526"
## [1] "INVJ - INF.SLF :  0.0284109796666756"
## [1] "INVK - INVB :  0.0258895287357712"
## [1] "INVK - INVH :  8.93195067575014e-06"
## [1] "INVK - INVL :  0.0333737968074144"
## [1] "INVK - INVM :  7.15269456774692e-05"
## [1] "INVK - INF.SLF :  0.0125418629938503"
## [1] "INVL - INVE :  0.00135806748739675"
## [1] "INVL - INVK :  0.0333737968074144"
## [1] "INVL - INVM :  0.0384085570445376"
## [1] "INVL - X3.3F :  0.00791216426280061"
## [1] "INVM - INVB :  0.027868034378528"
## [1] "INVM - INVK :  7.15269456774692e-05"
## [1] "INVM - INVL :  0.0384085570445376"
## [1] "USERA - INVA :  0.0353299558107456"
## [1] "USERA - USERB :  0.0147671851418938"
## [1] "USERA - USERC :  4.54691560456411e-05"
## [1] "USERA - USERD :  0.000301950446309762"
## [1] "USERA - USERE :  0.00345745114598728"
## [1] "USERA - USERF :  4.37381683067173e-05"
## [1] "USERA - INF.MNG :  0.01183380490133"
## [1] "USERA - INF.UX :  0.00310787721493977"
## [1] "USERA - INF.DEV :  0.00708466025786381"
## [1] "USERA - INF.SLF :  0.00162382361741198"
## [1] "USERA - X3.3C :  0.0422436146046841"
## [1] "USERB - INVA :  0.0036006742456467"
## [1] "USERB - INVG :  0.0468498083014208"
## [1] "USERB - INVI :  0.00251543695502021"
## [1] "USERB - USERA :  0.0147671851418938"
## [1] "USERB - USERC :  8.27741867270859e-07"
## [1] "USERB - USERD :  0.00147875266834863"
## [1] "USERB - USERE :  0.00301677745822593"
## [1] "USERB - USERF :  0.00754573091907584"
## [1] "USERB - INF.MNG :  0.0334260803798894"
## [1] "USERB - INF.UX :  0.0012839123771009"
## [1] "USERB - INF.DEV :  1.76837703369515e-06"
## [1] "USERB - INF.SLF :  4.08943029284181e-06"
## [1] "USERB - X3.3B :  0.0401008241631553"
## [1] "USERC - INVA :  0.0312598398747386"
## [1] "USERC - INVE :  0.00602840850829445"
## [1] "USERC - USERA :  4.54691560456411e-05"
## [1] "USERC - USERB :  8.27741867270859e-07"
## [1] "USERC - USERD :  8.66380357233965e-07"
## [1] "USERC - USERE :  8.19424107234568e-07"
## [1] "USERC - USERF :  2.56253194219624e-08"
## [1] "USERC - INF.MNG :  0.000319903050952863"
## [1] "USERC - INF.DEV :  0.000281923057243727"
## [1] "USERC - INF.SLF :  1.14681810714501e-08"
## [1] "USERC - X3.3B :  0.0270920551486193"
## [1] "USERD - INVA :  0.0199488647352783"
## [1] "USERD - INVE :  0.0213030996404555"
## [1] "USERD - INVH :  0.0337626978285754"
## [1] "USERD - USERA :  0.000301950446309762"
## [1] "USERD - USERB :  0.00147875266834863"
## [1] "USERD - USERC :  8.66380357233965e-07"
## [1] "USERD - USERE :  1.06399908217369e-07"
## [1] "USERD - USERF :  5.32056620983212e-11"
## [1] "USERD - INF.MNG :  0.0188418689054513"
## [1] "USERD - INF.UX :  0.000371729940110477"
## [1] "USERD - INF.DEV :  0.01249398339277"
## [1] "USERD - INF.SLF :  0.00129820106570566"
## [1] "USERE - INVB :  0.0480961857970503"
## [1] "USERE - INVH :  0.00422267591765935"
## [1] "USERE - USERA :  0.00345745114598728"
## [1] "USERE - USERB :  0.00301677745822593"
## [1] "USERE - USERC :  8.19424107234568e-07"
## [1] "USERE - USERD :  1.06399908217369e-07"
## [1] "USERE - USERF :  2.65121258280487e-12"
## [1] "USERE - INF.MNG :  9.9427145272557e-06"
## [1] "USERE - INF.UX :  0.00796522493967777"
## [1] "USERE - INF.DEV :  0.00075510222656483"
## [1] "USERE - INF.SLF :  0.000937177695052416"
## [1] "USERE - X3.3D :  0.035609753575728"
## [1] "USERF - INVH :  0.031166497787726"
## [1] "USERF - USERA :  4.37381683067173e-05"
## [1] "USERF - USERB :  0.00754573091907584"
## [1] "USERF - USERC :  2.56253194219624e-08"
## [1] "USERF - USERD :  5.32056620983212e-11"
## [1] "USERF - USERE :  2.65121258280487e-12"
## [1] "USERF - INF.MNG :  5.55148949468176e-05"
## [1] "USERF - INF.UX :  6.6553414268089e-05"
## [1] "USERF - INF.DEV :  1.54202987576735e-05"
## [1] "USERF - INF.SLF :  0.000128866693251961"
## [1] "USERF - X3.3D :  0.045621714906551"
## [1] "INF.MNG - INVJ :  0.018037167998306"
## [1] "INF.MNG - USERA :  0.01183380490133"
## [1] "INF.MNG - USERB :  0.0334260803798894"
## [1] "INF.MNG - USERC :  0.000319903050952863"
## [1] "INF.MNG - USERD :  0.0188418689054513"
## [1] "INF.MNG - USERE :  9.9427145272557e-06"
## [1] "INF.MNG - USERF :  5.55148949468176e-05"
## [1] "INF.MNG - INF.UX :  0.00218455141592488"
## [1] "INF.MNG - INF.DEV :  3.4459147535415e-08"
## [1] "INF.MNG - INF.SLF :  5.92143742239593e-06"
## [1] "INF.UX - INVJ :  0.000932404587634883"
## [1] "INF.UX - USERA :  0.00310787721493977"
## [1] "INF.UX - USERB :  0.0012839123771009"
## [1] "INF.UX - USERD :  0.000371729940110477"
## [1] "INF.UX - USERE :  0.00796522493967777"
## [1] "INF.UX - USERF :  6.6553414268089e-05"
## [1] "INF.UX - INF.MNG :  0.00218455141592488"
## [1] "INF.UX - INF.DEV :  7.0037015276192e-07"
## [1] "INF.UX - INF.SLF :  0.000454229630998171"
## [1] "INF.DEV - INVJ :  0.0282445092149526"
## [1] "INF.DEV - USERA :  0.00708466025786381"
## [1] "INF.DEV - USERB :  1.76837703369515e-06"
## [1] "INF.DEV - USERC :  0.000281923057243727"
## [1] "INF.DEV - USERD :  0.01249398339277"
## [1] "INF.DEV - USERE :  0.00075510222656483"
## [1] "INF.DEV - USERF :  1.54202987576735e-05"
## [1] "INF.DEV - INF.MNG :  3.4459147535415e-08"
## [1] "INF.DEV - INF.UX :  7.0037015276192e-07"
## [1] "INF.DEV - INF.SLF :  1.16981029307794e-08"
## [1] "INF.SLF - INVA :  0.0412554118716966"
## [1] "INF.SLF - INVB :  0.00873403508279336"
## [1] "INF.SLF - INVE :  0.024334039746599"
## [1] "INF.SLF - INVI :  0.0187977431290931"
## [1] "INF.SLF - INVJ :  0.0284109796666756"
## [1] "INF.SLF - INVK :  0.0125418629938503"
## [1] "INF.SLF - USERA :  0.00162382361741198"
## [1] "INF.SLF - USERB :  4.08943029284181e-06"
## [1] "INF.SLF - USERC :  1.14681810714501e-08"
## [1] "INF.SLF - USERD :  0.00129820106570566"
## [1] "INF.SLF - USERE :  0.000937177695052416"
## [1] "INF.SLF - USERF :  0.000128866693251961"
## [1] "INF.SLF - INF.MNG :  5.92143742239593e-06"
## [1] "INF.SLF - INF.UX :  0.000454229630998171"
## [1] "INF.SLF - INF.DEV :  1.16981029307794e-08"
## [1] "INF.SLF - X3.3C :  0.0130908794779845"
## [1] "X3.3A - INVG :  0.0162703862893996"
## [1] "X3.3A - X3.3B :  3.66904409077051e-08"
## [1] "X3.3A - X3.3C :  0.00177939176236919"
## [1] "X3.3B - USERB :  0.0401008241631553"
## [1] "X3.3B - USERC :  0.0270920551486193"
## [1] "X3.3B - X3.3A :  3.66904409077051e-08"
## [1] "X3.3B - X3.3C :  0.00528980566625714"
## [1] "X3.3B - X3.3D :  0.0310954820639853"
## [1] "X3.3C - USERA :  0.0422436146046841"
## [1] "X3.3C - INF.SLF :  0.0130908794779845"
## [1] "X3.3C - X3.3A :  0.00177939176236919"
## [1] "X3.3C - X3.3B :  0.00528980566625714"
## [1] "X3.3D - INVH :  0.00816333431166094"
## [1] "X3.3D - USERE :  0.035609753575728"
## [1] "X3.3D - USERF :  0.045621714906551"
## [1] "X3.3D - X3.3B :  0.0310954820639853"
## [1] "X3.3D - X3.3F :  0.00726066188896657"
## [1] "X3.3E - INVB :  0.014731131345026"
## [1] "X3.3E - INVC :  0.0193337474844781"
## [1] "X3.3E - INVF :  0.02474032352352"
## [1] "X3.3F - INVC :  0.0418212014638715"
## [1] "X3.3F - INVL :  0.00791216426280061"
## [1] "X3.3F - X3.3D :  0.00726066188896657"

finding optimal number of clusters

fviz_nbclust(clus_data_scaled, kmeans, method = "wss") +
  geom_vline(xintercept = 4, linetype = 2)+ labs(subtitle = "Elbow method")

fviz_nbclust(clus_data_scaled, kmeans, method = "silhouette")+
   labs(subtitle = "Silhouette method")

fviz_nbclust(clus_data_scaled, kmeans, nstart = 25,  method = "gap_stat", nboot = 50)+
   labs(subtitle = "Gap statistic method")

nb <- NbClust(clus_data_scaled, distance = "euclidean", min.nc = 2,
        max.nc = 7, method = "kmeans")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 9 proposed 2 as the best number of clusters 
## * 3 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 2 proposed 5 as the best number of clusters 
## * 4 proposed 6 as the best number of clusters 
## * 4 proposed 7 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
fviz_nbclust(nb)
## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 9 proposed  2 as the best number of clusters
## * 3 proposed  3 as the best number of clusters
## * 2 proposed  4 as the best number of clusters
## * 2 proposed  5 as the best number of clusters
## * 4 proposed  6 as the best number of clusters
## * 4 proposed  7 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  2 .

heatmap

col <- colorRampPalette(c("darkblue", "white", "darkorange"))(20) # get some colors
heatmap(x = cor_matrix, col=col, symm = TRUE)

alternative views:

suppressWarnings(chart.Correlation(cor_matrix, histogram = TRUE, pch = 19))

Hierarchical clustering of observations with company identifiers

d2 <- dist(clus_data_scaled, method="euclidean")
hcl2 <- hclust(d2, method="ward.D2")
plot(hcl2, cex=.5)

groups2 <- cutree(hcl2, k=3)

another heatmap

#heatmap with company identifiers
# get a color palette equal to the number of clusters
clusterCols <- rainbow(length(unique(groups2)))
# create vector of colors for side bar
myClusterSideBar <- clusterCols[groups2]
# choose a color palette for the heat map
myheatcol <- rev(redgreen(75))
# draw the heat map
heatmap.2(clus_data_scaled, main="Hierarchical Cluster", Rowv=as.dendrogram(hcl2), Colv=NA, dendrogram="row", scale="row", col=myheatcol, density.info="none", trace="none", RowSideColors= myClusterSideBar)

Principal components analysis

pc <- princomp(cor_matrix, cor=TRUE)
summary(pc)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     3.1840537 2.4792743 1.7806525 1.50580066 1.08722833
## Proportion of Variance 0.3379399 0.2048934 0.1056908 0.07558119 0.03940218
## Cumulative Proportion  0.3379399 0.5428333 0.6485241 0.72410527 0.76350745
##                            Comp.6     Comp.7     Comp.8     Comp.9
## Standard deviation     1.04840432 0.97569395 0.89673343 0.84882538
## Proportion of Variance 0.03663839 0.03173262 0.02680436 0.02401682
## Cumulative Proportion  0.80014584 0.83187846 0.85868282 0.88269964
##                           Comp.10    Comp.11    Comp.12    Comp.13
## Standard deviation     0.79877547 0.72752661 0.64416638 0.61753041
## Proportion of Variance 0.02126808 0.01764317 0.01383168 0.01271146
## Cumulative Proportion  0.90396771 0.92161088 0.93544256 0.94815402
##                           Comp.14     Comp.15     Comp.16     Comp.17
## Standard deviation     0.57605786 0.497954750 0.459936557 0.386392513
## Proportion of Variance 0.01106142 0.008265298 0.007051388 0.004976639
## Cumulative Proportion  0.95921544 0.967480735 0.974532123 0.979508762
##                            Comp.18     Comp.19     Comp.20     Comp.21
## Standard deviation     0.376864769 0.349848494 0.271638792 0.246975211
## Proportion of Variance 0.004734235 0.004079799 0.002459588 0.002033225
## Cumulative Proportion  0.984242997 0.988322796 0.990782384 0.992815609
##                            Comp.22     Comp.23     Comp.24      Comp.25
## Standard deviation     0.244604714 0.224296828 0.187234720 0.1557082002
## Proportion of Variance 0.001994382 0.001676969 0.001168561 0.0008081681
## Cumulative Proportion  0.994809991 0.996486960 0.997655521 0.9984636895
##                             Comp.26      Comp.27      Comp.28      Comp.29
## Standard deviation     0.1335082979 0.1124799368 0.0962377620 0.0796957195
## Proportion of Variance 0.0005941489 0.0004217245 0.0003087236 0.0002117136
## Cumulative Proportion  0.9990578383 0.9994795628 0.9997882864 1.0000000000
##                             Comp.30
## Standard deviation     2.026708e-08
## Proportion of Variance 1.369182e-17
## Cumulative Proportion  1.000000e+00
loadings(pc)
## 
## Loadings:
##         Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## INVA     0.114  0.340        -0.102  0.173               -0.203       
## INVB    -0.150         0.143  0.348        -0.318        -0.120  0.397
## INVC            0.319        -0.308  0.179               -0.169 -0.155
## INVD            0.330        -0.223  0.135 -0.116                0.180
## INVE           -0.370         0.154                                   
## INVF            0.320               -0.343         0.105 -0.144       
## INVG     0.115  0.268 -0.197 -0.188 -0.112         0.201              
## INVH     0.184 -0.130        -0.299               -0.176  0.390 -0.178
## INVI     0.173        -0.141        -0.429  0.234 -0.204        -0.102
## INVJ     0.226 -0.105                0.381  0.126         0.217       
## INVK     0.190 -0.205        -0.269               -0.240 -0.103       
## INVL           -0.284                0.162 -0.269        -0.336 -0.120
## INVM           -0.216        -0.361         0.190 -0.239 -0.333  0.217
## INVN           -0.196        -0.135                0.653        -0.455
## USERA   -0.267                                            0.286  0.122
## USERB    0.268                0.159 -0.135  0.106                     
## USERC   -0.282               -0.115                                   
## USERD   -0.262        -0.163         0.215                            
## USERE   -0.271        -0.136         0.212  0.101        -0.130       
## USERF   -0.275        -0.155         0.103  0.132        -0.131       
## INF.MNG -0.254                      -0.278  0.135               -0.153
## INF.UX  -0.269               -0.113 -0.243        -0.234              
## INF.DEV -0.284               -0.159 -0.182                            
## INF.SLF -0.300                                                        
## X3.3A          -0.106  0.470                0.311        -0.123       
## X3.3B                 -0.456  0.249        -0.241 -0.126        -0.176
## X3.3C   -0.127  0.101  0.376         0.105  0.308 -0.143        -0.165
## X3.3D           0.129 -0.295  0.318         0.366        -0.358 -0.210
## X3.3E                  0.293  0.127        -0.376 -0.401        -0.551
## X3.3F           0.214 -0.229  0.207  0.305  0.277 -0.164  0.386       
##         Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## INVA                             0.182          -0.216           0.101 
## INVB     0.426   0.230          -0.115   0.167           0.143  -0.163 
## INVC             0.128  -0.129  -0.105                  -0.204   0.105 
## INVD    -0.117   0.186  -0.121                   0.235   0.122  -0.445 
## INVE    -0.136  -0.109                                                 
## INVF     0.268           0.241  -0.245          -0.351           0.104 
## INVG    -0.150  -0.113   0.433           0.104   0.199   0.277   0.183 
## INVH     0.395           0.151           0.293          -0.138   0.116 
## INVI             0.629   0.196  -0.127  -0.145           0.109  -0.194 
## INVJ             0.111   0.175  -0.472  -0.298   0.227  -0.238   0.126 
## INVK     0.129  -0.102                   0.391  -0.189          -0.247 
## INVL    -0.154           0.533   0.169           0.177   0.109  -0.122 
## INVM     0.210  -0.224  -0.164          -0.329           0.227  -0.142 
## INVN     0.222   0.123  -0.235                           0.285         
## USERA            0.156   0.211   0.261  -0.277  -0.442          -0.237 
## USERB   -0.375                   0.113   0.252                         
## USERC                    0.230   0.111  -0.118  -0.169  -0.193         
## USERD   -0.115   0.289  -0.154           0.134           0.221   0.160 
## USERE                    0.106  -0.276   0.280                         
## USERF            0.149          -0.180   0.160  -0.169           0.174 
## INF.MNG -0.252  -0.248          -0.262   0.221   0.130  -0.145  -0.409 
## INF.UX                  -0.118                   0.209   0.354   0.360 
## INF.DEV  0.105                          -0.107   0.314                 
## INF.SLF                          0.155           0.121  -0.267         
## X3.3A           -0.132   0.172   0.131  -0.133  -0.134   0.101   0.154 
## X3.3B                   -0.171   0.289                  -0.194         
## X3.3C    0.159   0.212           0.387   0.273   0.316  -0.107         
## X3.3D    0.310  -0.128                           0.140  -0.167  -0.136 
## X3.3E                           -0.195  -0.186           0.207  -0.167 
## X3.3F           -0.260   0.160                           0.367  -0.177 
##         Comp.18 Comp.19 Comp.20 Comp.21 Comp.22 Comp.23 Comp.24 Comp.25
## INVA             0.233  -0.222   0.143   0.391   0.251  -0.234  -0.145 
## INVB             0.182   0.174   0.176                           0.109 
## INVC    -0.161           0.303   0.516  -0.272  -0.206           0.139 
## INVD                    -0.146  -0.315   0.104           0.380   0.243 
## INVE    -0.222           0.124                   0.127           0.175 
## INVF    -0.113  -0.272  -0.216  -0.191  -0.350                         
## INVG     0.226   0.108   0.445  -0.174                  -0.231         
## INVH     0.138   0.411                                   0.227   0.158 
## INVI    -0.223                                  -0.175                 
## INVJ     0.193  -0.172          -0.117                  -0.103         
## INVK            -0.503   0.214  -0.108   0.239  -0.193  -0.101         
## INVL            -0.120  -0.302   0.273           0.177   0.194         
## INVM             0.280          -0.115  -0.307          -0.152         
## INVN            -0.102                                                 
## USERA    0.414  -0.150   0.127   0.198  -0.131   0.187                 
## USERB    0.249                                   0.162  -0.189         
## USERC   -0.366   0.139   0.271  -0.387   0.142           0.176  -0.295 
## USERD                    0.163          -0.120   0.143           0.131 
## USERE    0.320   0.109  -0.228                  -0.330          -0.461 
## USERF                   -0.241  -0.163           0.100           0.558 
## INF.MNG          0.224           0.138           0.164                 
## INF.UX   0.148  -0.307           0.170   0.227   0.141   0.307  -0.191 
## INF.DEV         -0.118  -0.298           0.124  -0.171  -0.411   0.109 
## INF.SLF                          0.107   0.212          -0.299   0.158 
## X3.3A                                    0.126  -0.542   0.208   0.236 
## X3.3B    0.195          -0.164  -0.158  -0.270  -0.341                 
## X3.3C           -0.126          -0.175  -0.345   0.156  -0.103  -0.143 
## X3.3D    0.187           0.202           0.238   0.157   0.252   0.144 
## X3.3E    0.173                  -0.170                  -0.164         
## X3.3F   -0.372                   0.142                                 
##         Comp.26 Comp.27 Comp.28 Comp.29 Comp.30
## INVA     0.358          -0.131   0.133  -0.308 
## INVB     0.127           0.231          -0.135 
## INVC             0.162          -0.184         
## INVD                    -0.157          -0.182 
## INVE             0.352  -0.495  -0.168  -0.496 
## INVF            -0.167                  -0.226 
## INVG                             0.114         
## INVH            -0.141          -0.180         
## INVI                             0.111         
## INVJ     0.106           0.203   0.109  -0.267 
## INVK     0.220                                 
## INVL                                           
## INVM                                    -0.172 
## INVN                     0.147          -0.127 
## USERA    0.126   0.120                         
## USERB   -0.310           0.387  -0.397  -0.265 
## USERC                    0.386  -0.182  -0.124 
## USERD    0.267  -0.632  -0.117  -0.190         
## USERE   -0.179   0.126  -0.255  -0.125  -0.111 
## USERF            0.370   0.183   0.226   0.173 
## INF.MNG  0.278  -0.112   0.199   0.282  -0.141 
## INF.UX           0.125   0.174          -0.260 
## INF.DEV  0.274           0.105  -0.520         
## INF.SLF -0.546  -0.336           0.317  -0.244 
## X3.3A    0.167  -0.130                  -0.189 
## X3.3B    0.215           0.193   0.147  -0.265 
## X3.3C            0.161           0.109         
## X3.3D                           -0.184         
## X3.3E                                          
## X3.3F   -0.100           0.116          -0.157 
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.033  0.033  0.033  0.033  0.033  0.033  0.033  0.033
## Cumulative Var  0.033  0.067  0.100  0.133  0.167  0.200  0.233  0.267
##                Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## SS loadings     1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var  0.033   0.033   0.033   0.033   0.033   0.033   0.033
## Cumulative Var  0.300   0.333   0.367   0.400   0.433   0.467   0.500
##                Comp.16 Comp.17 Comp.18 Comp.19 Comp.20 Comp.21 Comp.22
## SS loadings      1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var   0.033   0.033   0.033   0.033   0.033   0.033   0.033
## Cumulative Var   0.533   0.567   0.600   0.633   0.667   0.700   0.733
##                Comp.23 Comp.24 Comp.25 Comp.26 Comp.27 Comp.28 Comp.29
## SS loadings      1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var   0.033   0.033   0.033   0.033   0.033   0.033   0.033
## Cumulative Var   0.767   0.800   0.833   0.867   0.900   0.933   0.967
##                Comp.30
## SS loadings      1.000
## Proportion Var   0.033
## Cumulative Var   1.000
plot(pc, type="lines") # indicates 3? main components

print(pc$scores)
##               Comp.1      Comp.2      Comp.3      Comp.4      Comp.5
## INVA     2.831152290  4.77279363  0.30845305 -0.46613008  0.98739139
## INVB    -1.379029636 -0.17514652  1.57851215  3.21801748  0.38584706
## INVC     0.959867130  4.07703723  0.29965564 -2.24813572  0.94267387
## INVD     1.488516339  4.24265955  1.12151973 -1.46777907  0.72795728
## INVE    -0.103974375 -5.91299105  0.35843192  1.86524207 -0.65590760
## INVF     1.316726933  4.00592294  0.92713235  0.05991851 -2.12715983
## INVG     2.719313801  3.24290345 -1.94798443 -1.22845583 -0.72503958
## INVH     3.866616310 -1.92487358 -0.05390742 -2.18402659 -0.28786675
## INVI     3.445436068 -1.25324820 -1.25272624  0.34115402 -2.52836962
## INVJ     4.701641851 -1.63739710  0.72257651  0.28973497  2.12019978
## INVK     4.117028018 -3.01891536 -0.42416327 -2.10342739  0.47881455
## INVL     2.309909363 -3.70933973 -0.60439490 -0.49257119  0.84099653
## INVM     2.117332377 -2.97710948 -0.59366797 -2.55621918 -0.17716093
## INVN     2.299848043 -2.55354216  0.33740835 -0.76100866  0.01875729
## USERA   -3.577943602 -0.43145262 -0.18748076 -0.23554259 -0.21656299
## USERB    5.896119424  0.20451038  1.01884631  1.82764420 -1.07191999
## USERC   -4.415563250 -1.31630874 -0.39687838 -0.74819208  0.34839043
## USERD   -4.035208855 -1.43298699 -1.58295537  0.07331082  1.30750198
## USERE   -4.077114586  0.04716503 -1.24776557 -0.37823996  1.27532901
## USERF   -4.348561266 -0.73619523 -1.54059605 -0.59629128  0.56885234
## INF.MNG -3.489963782  0.65867308  1.09014145 -0.40080614 -1.71699741
## INF.UX  -3.911857413  0.67804164  0.03242728 -0.57522672 -1.60103286
## INF.DEV -4.586613812  0.47237181 -0.22243760 -1.18121621 -1.26710915
## INF.SLF -5.325163916 -0.37547215 -0.08713402 -0.48906437  0.12568367
## X3.3A    0.006817647 -1.51304353  4.83657953  1.04241160  0.05228136
## X3.3B    1.470455357  0.54362417 -4.30855258  2.51090950 -0.41553139
## X3.3C   -1.290231950  1.13688056  3.66853415  0.64982352  0.57140677
## X3.3D    0.914071768  1.45620258 -2.65604235  2.82507966 -0.02144642
## X3.3E   -0.482239126  0.83804669  2.81065610  1.31211552  0.41115101
## X3.3F    0.562612849  2.59118969 -2.00418762  2.09697119  1.64887019
##              Comp.6      Comp.7      Comp.8        Comp.9     Comp.10
## INVA    -0.06081099  0.09840902 -0.85675144 -0.0008950916  0.06796906
## INVB    -1.97273756  0.55220956 -0.41375493  2.1291422781  1.69016496
## INVC    -0.46773298 -0.33746358 -0.67684441 -0.6064180480 -0.52698857
## INVD    -0.88023942  0.61840164  0.65569360  1.1581185532 -0.67298070
## INVE    -0.78811889  0.41298304  0.62859761  0.2748037165 -0.81462340
## INVF    -0.67489797  0.67907266 -0.55091709  0.3085032681  1.04479886
## INVG    -0.46637197  1.18815610  0.23146105  0.2545315161 -0.72967016
## INVH    -0.20047936 -0.80841802  2.02966745 -0.6369541027  1.58584256
## INVI     1.20359691 -0.97026058 -0.05847765 -0.3007174965 -0.15409412
## INVJ     0.53894003  0.31344673  1.28816163  0.6351430539 -0.31141596
## INVK    -0.18221377 -1.20369060 -0.38133739  0.3887538639  0.52102416
## INVL    -1.62529699  0.08571437 -1.56038626 -0.4485880124 -0.67997688
## INVM     0.95989603 -1.09716023 -1.48085594  1.3041111187  0.76112688
## INVN    -0.06459077  3.54424682  0.39297759 -1.8212128247  0.86109445
## USERA    0.39175995 -0.05598646  1.54736047  0.7413947369 -0.15086267
## USERB    0.45982497  0.02676292 -0.27855294  0.8053983739 -1.92330366
## USERC   -0.47737880  0.51751403 -0.16013601 -0.0843482495 -0.36415392
## USERD    0.38872909 -0.01817889 -0.33925280 -0.0564150663 -0.55618491
## USERE    0.52068532  0.49569548 -0.59050556  0.2637842284 -0.20602785
## USERF    0.82204813 -0.39550744 -0.64948246 -0.2684493084 -0.46964531
## INF.MNG  0.69002546  0.34596318  0.34278976 -0.5700195122 -1.20632546
## INF.UX  -0.40240376 -1.33824571  0.44638782  0.4841757599  0.22386100
## INF.DEV  0.17290048  0.60869915  0.52478737  0.0127292815  0.41951767
## INF.SLF -0.78031493  0.07420855 -0.06006900  0.2415652903  0.28247843
## X3.3A    1.61114088  0.35345531 -0.46808242  0.1374194048 -0.43839160
## X3.3B   -1.53491941 -0.55048113  0.21433357 -0.5004678307 -0.27097028
## X3.3C    1.63307761 -0.73608920 -0.19487059 -0.6844619739  0.67444425
## X3.3D    1.97962155  0.40163504 -1.56852710 -0.7709262101  1.27702420
## X3.3E   -2.21229399 -2.02062624 -0.04425482 -2.3594005626 -0.08946972
## X3.3F    1.41855515 -0.78446549  2.03084091 -0.0303001547  0.15573868
##             Comp.11      Comp.12      Comp.13      Comp.14     Comp.15
## INVA     0.09838343 -0.353850709  0.621404439 -0.195719049 -0.61031585
## INVB     0.71226930 -0.005524616 -0.413657765  0.467701331  0.17290488
## INVC     0.35845136 -0.608352227 -0.383166285 -0.203498325  0.05961091
## INVD     0.58131372 -0.687619706  0.279576826 -0.213606201  0.60723608
## INVE    -0.70112840 -0.486975556 -0.087636300  0.172158334  0.06195715
## INVF    -0.44324437  0.586665799 -0.867475291  0.152801903 -0.99148536
## INVG    -0.57676876  1.395335355  0.244520501  0.301638495  0.52239399
## INVH     0.17667143  0.322314355  0.115438787  0.890900765  0.01843435
## INVI     2.38750103  0.559063354 -0.427388557 -0.522969113  0.15279354
## INVJ     0.21797518  0.305652073 -1.602108081 -1.033025233  0.58261793
## INVK    -0.55025047 -0.353885747  0.004526343  1.156158832 -0.52248061
## INVL    -0.27335079  1.739050693  0.546830813 -0.003552179  0.48711654
## INVM    -1.03752216 -0.808362009  0.170246322 -1.095665723  0.04327917
## INVN     0.33946081 -1.084300010  0.231135441 -0.191415821 -0.24456475
## USERA    0.54627975  0.604758060  0.869933654 -0.883420074 -1.23889302
## USERB    0.14674921 -0.576508352  0.409985097  0.704553666 -0.27523920
## USERC    0.20780010  0.717682543  0.405347590 -0.417298203 -0.51041419
## USERD    1.14373794 -0.726491197 -0.106876569  0.393675586 -0.02537341
## USERE    0.17693583  0.309875718 -0.937345976  0.886811678 -0.26420765
## USERF    0.55774136 -0.004682708 -0.629123751  0.485901636 -0.47598340
## INF.MNG -1.12135859 -0.366326344 -0.896276775  0.635754550  0.28545867
## INF.UX  -0.24471616 -0.645157594 -0.095361911 -0.221982230  0.49646757
## INF.DEV -0.42402148  0.078734698  0.051073425 -0.345885534  0.86303551
## INF.SLF -0.21767478  0.007527774  0.605582998 -0.231940908  0.32420154
## X3.3A   -0.66935807  0.326473209  0.439009489 -0.485629072 -0.37025930
## X3.3B   -0.09841495 -0.889735090  0.911616440 -0.224114732  0.07879089
## X3.3C    0.75867636  0.230924270  1.261887249  0.847431010  0.84690468
## X3.3D   -0.58556851  0.084015371 -0.219676462 -0.344684593  0.39227063
## X3.3E   -0.22492016 -0.003820452 -0.667388292 -0.604499961 -0.23891366
## X3.3F   -1.24164914  0.333519045  0.165366601  0.123419167 -0.22734361
##             Comp.16     Comp.17      Comp.18     Comp.19      Comp.20
## INVA    -0.26457073  0.33585517 -0.241042971  0.46391231 -0.336104657
## INVB     0.27446881 -0.23462800 -0.073094843  0.39235423  0.250249870
## INVC    -0.55411918  0.28422497 -0.365863559 -0.08570465  0.447917641
## INVD     0.25879927 -0.82399179 -0.137409275 -0.09050311 -0.221532042
## INVE    -0.32076752  0.34388097 -0.592010711  0.04347945  0.123225843
## INVF    -0.21916072  0.32630580 -0.308982014 -0.50197757 -0.308126099
## INVG     0.65280497  0.44992454  0.406270182  0.21212115  0.658067374
## INVH    -0.36793460  0.31346165  0.229001811  0.81530806 -0.106238465
## INVI     0.20136199 -0.38801635 -0.511004552  0.06012134  0.003110075
## INVJ    -0.70036398  0.41130095  0.291749924 -0.29814441 -0.085985585
## INVK    -0.23788242 -0.46245664 -0.060506828 -0.93343442  0.294698596
## INVL     0.30204398 -0.24804161 -0.092696559 -0.22770267 -0.471259022
## INVM     0.47979991 -0.21251808  0.059921163  0.58218396  0.019432656
## INVN     0.66711877 -0.10502826  0.050760288 -0.18882423 -0.019645873
## USERA   -0.17876145 -0.46321642  0.820525331 -0.26944843  0.174538905
## USERB   -0.05634242  0.06768326  0.382874623  0.01505327 -0.080716876
## USERC   -0.50538010  0.10381546 -0.788438418  0.28164249  0.386704762
## USERD    0.55915054  0.35036826 -0.075936651 -0.11702849  0.245712640
## USERE    0.11101491 -0.16357285  0.648444995  0.20871361 -0.329351180
## USERF    0.20247674  0.34937587 -0.029528049  0.19119128 -0.364934990
## INF.MNG -0.40302700 -0.78496425  0.008504698  0.45333661  0.108765273
## INF.UX   0.83898438  0.85484384  0.240461138 -0.53007145 -0.019251953
## INF.DEV -0.21433354 -0.07334574 -0.128374406 -0.21417651 -0.455065590
## INF.SLF -0.77597838 -0.08206468  0.034144016 -0.16234005  0.028213825
## X3.3A    0.20653705  0.43166044  0.060805259  0.14366412 -0.052415089
## X3.3B   -0.58438370  0.20672135  0.310385844  0.09982061 -0.265745985
## X3.3C   -0.27659376  0.11172064  0.030246464 -0.23328744  0.019903223
## X3.3D   -0.43244287 -0.27029431  0.333965179 -0.14004646  0.319913556
## X3.3E    0.51496747 -0.34343664  0.353929055  0.12334779  0.143232945
## X3.3F    0.82251357 -0.28556753 -0.857101133 -0.09356040 -0.107313777
##              Comp.21      Comp.22     Comp.23       Comp.24      Comp.25
## INVA     0.192424831  0.526735072  0.32058461 -0.2138462295 -0.172506936
## INVB     0.246748180 -0.003799826 -0.05035590 -0.0741004539  0.051390866
## INVC     0.679967109 -0.378658498 -0.24990458  0.0957067537  0.095316001
## INVD    -0.429567463  0.126423167 -0.01807699  0.3962371690  0.160680310
## INVE     0.030551677 -0.070598845  0.16414573  0.0002149144  0.045126109
## INVF    -0.262683472 -0.455374188  0.11680265  0.0463403748  0.027860672
## INVG    -0.225589513  0.004364650 -0.10271355 -0.2254965042  0.074107382
## INVH     0.073626861  0.102345005  0.04601934  0.2375993276  0.106924066
## INVI     0.115937771  0.126187425 -0.23360866 -0.0843088585 -0.083330193
## INVJ    -0.153685799 -0.026569810  0.14224634 -0.0856085966 -0.078387065
## INVK    -0.147658765  0.325121345 -0.22780127 -0.0941097945 -0.005106212
## INVL     0.384207249 -0.120584438  0.22538868  0.2090042644  0.016922150
## INVM    -0.159416309 -0.413409341  0.08982140 -0.1434978467 -0.055419775
## INVN     0.089645095 -0.021405559  0.01526874 -0.0534434355 -0.069459079
## USERA    0.275504944 -0.190940303  0.22818979  0.0271559669  0.065419679
## USERB    0.042188972  0.117786462  0.17980760 -0.1763073315 -0.027528957
## USERC   -0.513099632  0.187451515  0.04212705  0.1962611275 -0.282301170
## USERD   -0.107937755 -0.169238431  0.17566216 -0.0403035437  0.078145762
## USERE    0.036657387 -0.053011922 -0.39462551  0.0329397183 -0.408021322
## USERF   -0.211610054  0.076497782  0.12112920 -0.0398911212  0.484364862
## INF.MNG  0.177253325 -0.100889290  0.22013842  0.1028026851 -0.065338164
## INF.UX   0.213221135  0.311490272  0.18370160  0.3299926209 -0.221201114
## INF.DEV  0.005169748  0.137271173 -0.20017670 -0.4052923270  0.077059624
## INF.SLF  0.158994609  0.285736507 -0.08311084 -0.2788780620  0.099958103
## X3.3A    0.031810148  0.172383810 -0.65670035  0.2269153372  0.153476654
## X3.3B   -0.219088888 -0.334141990 -0.41103155  0.1057620323 -0.039067064
## X3.3C   -0.249420236 -0.481608313  0.19936158 -0.0954310684 -0.129221615
## X3.3D   -0.014827398  0.320932563  0.20364362  0.2845506900  0.121919382
## X3.3E   -0.258587171  0.100383209  0.01597969 -0.1956370405  0.012296292
## X3.3F    0.199263414 -0.100879203 -0.06191230 -0.0853307684 -0.034079248
##              Comp.26      Comp.27      Comp.28       Comp.29       Comp.30
## INVA     0.231320805  0.017527270 -0.110087642  0.0681087559  6.522560e-16
## INVB     0.087766688  0.051203773  0.114662042  0.0189312987  5.800915e-15
## INVC    -0.060680309  0.101713313  0.028392339 -0.0806389611  3.851086e-15
## INVD    -0.061031547  0.018860477 -0.112256783 -0.0208744792 -7.188694e-15
## INVE    -0.007096329  0.177998096 -0.302973539 -0.0408435967 -5.273559e-16
## INVF    -0.063629302 -0.130008839 -0.053262558 -0.0023371005 -4.017620e-15
## INVG     0.040941531  0.048187887 -0.062996836  0.0581473719  2.775558e-16
## INVH    -0.055974107 -0.104903381 -0.004120704 -0.0786110080 -2.803313e-15
## INVI    -0.018662160 -0.005144985 -0.061818022  0.0648694814 -6.245005e-16
## INVJ     0.046142365 -0.023889233  0.074232062  0.0668999501  3.124237e-15
## INVK     0.158594113  0.035128780  0.027358937  0.0399996866  2.262079e-15
## INVL     0.041115228 -0.055104422  0.042868840 -0.0003555607  1.998401e-15
## INVM    -0.078856398  0.001031558  0.003207370  0.0017212211 -1.942890e-15
## INVN    -0.077663787  0.047747813  0.071146841  0.0531186572  8.604228e-16
## USERA    0.101976720  0.078917879 -0.037594245 -0.0241750340  1.304512e-15
## USERB   -0.245283711 -0.041017193  0.161064830 -0.1474116624  2.206568e-15
## USERC   -0.053263293  0.038126658  0.182431984 -0.0715124860  1.984524e-15
## USERD    0.188108671 -0.383240378 -0.067493916 -0.0743182012 -6.064593e-15
## USERE   -0.129974979  0.075338556 -0.142382874 -0.0511007719 -2.518818e-15
## USERF   -0.060063177  0.231224126  0.104522925  0.0906141408  3.705369e-15
## INF.MNG  0.186812767 -0.086275400  0.081804016  0.1350053234  1.658396e-15
## INF.UX  -0.037481751  0.061402350  0.057091928  0.0449003764  5.561523e-15
## INF.DEV  0.210111947  0.042049622  0.058473722 -0.2191280867 -4.669876e-15
## INF.SLF -0.401510629 -0.208742321 -0.057545780  0.1410491098 -1.637579e-15
## X3.3A    0.104994274 -0.102601228 -0.006020209  0.0299893297 -2.053913e-15
## X3.3B    0.136861801 -0.002328814  0.067938885  0.0848584242  6.661338e-16
## X3.3C    0.002752827  0.116479003 -0.014465584  0.0523909185 -7.459311e-17
## X3.3D   -0.036424933  0.013142838 -0.052027733 -0.0916977037 -4.163336e-16
## X3.3E   -0.040574223  0.010514039 -0.033038616 -0.0550159374 -2.563921e-15
## X3.3F   -0.109329104 -0.023337843  0.042888321  0.0074165436  2.220446e-15
biplot(pc)

# to look at the eigenvalues:
get_eigenvalue(pc)
##          eigenvalue variance.percent cumulative.variance.percent
## Dim.1  1.013820e+01     3.379399e+01                    33.79399
## Dim.2  6.146801e+00     2.048934e+01                    54.28333
## Dim.3  3.170723e+00     1.056908e+01                    64.85241
## Dim.4  2.267436e+00     7.558119e+00                    72.41053
## Dim.5  1.182065e+00     3.940218e+00                    76.35074
## Dim.6  1.099152e+00     3.663839e+00                    80.01458
## Dim.7  9.519787e-01     3.173262e+00                    83.18785
## Dim.8  8.041308e-01     2.680436e+00                    85.86828
## Dim.9  7.205045e-01     2.401682e+00                    88.26996
## Dim.10 6.380423e-01     2.126808e+00                    90.39677
## Dim.11 5.292950e-01     1.764317e+00                    92.16109
## Dim.12 4.149503e-01     1.383168e+00                    93.54426
## Dim.13 3.813438e-01     1.271146e+00                    94.81540
## Dim.14 3.318427e-01     1.106142e+00                    95.92154
## Dim.15 2.479589e-01     8.265298e-01                    96.74807
## Dim.16 2.115416e-01     7.051388e-01                    97.45321
## Dim.17 1.492992e-01     4.976639e-01                    97.95088
## Dim.18 1.420271e-01     4.734235e-01                    98.42430
## Dim.19 1.223940e-01     4.079799e-01                    98.83228
## Dim.20 7.378763e-02     2.459588e-01                    99.07824
## Dim.21 6.099675e-02     2.033225e-01                    99.28156
## Dim.22 5.983147e-02     1.994382e-01                    99.48100
## Dim.23 5.030907e-02     1.676969e-01                    99.64870
## Dim.24 3.505684e-02     1.168561e-01                    99.76555
## Dim.25 2.424504e-02     8.081681e-02                    99.84637
## Dim.26 1.782447e-02     5.941489e-02                    99.90578
## Dim.27 1.265174e-02     4.217245e-02                    99.94796
## Dim.28 9.261707e-03     3.087236e-02                    99.97883
## Dim.29 6.351408e-03     2.117136e-02                   100.00000
## Dim.30 4.107545e-16     1.369182e-15                   100.00000

Factor analysis

How many factors?

library(nFactors)
ev <- eigen(cor_matrix)
ap <- parallel(subject = nrow(na.omit(clus_data_selected)), var = ncol(na.omit(clus_data_selected)), rep = 100, cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS) 

Training sets

# number of rows in the dataset 
n <- nrow(clus_data)
n
## [1] 130
# choose randomly 80% of the rows
ind <- sample(n,  size = n * 0.8)
# create train set
training <- clus_data[ind,]
# create test set 
testing <- clus_data[-ind,]
# save the correct classes from test and train data
test_classes <- clus_data$COMPANY
testing <- dplyr::select(testing, -COMPANY)
#OR like below:
train_classes <- training$COMPANY
train_classes <- factor(train_classes)
train.def <- clus_data$COMPANY[-ind]
# company_pred <- knn(train = training, test = testing, cl = train_def, k=3)
# however the function above does not work. Is it because of NA's in the data?

# remove the company variable from test data
# test <- dplyr::select(test, -COMPANY)
# linear discriminant analysis
# lda.fit <- lda(age_range ~ , data = train)
# print the lda.fit object
# lda.fit